library("tidyverse")
## ── Attaching packages ─────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 2.2.1     ✔ purrr   0.2.5
## ✔ tibble  1.4.2     ✔ dplyr   0.7.5
## ✔ tidyr   0.8.1     ✔ stringr 1.3.1
## ✔ readr   1.1.1     ✔ forcats 0.3.0
## ── Conflicts ────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(ggpubr)
## Loading required package: magrittr
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
library(ggrepel)

source(file.path(".","..","R","ASE_functions.R"))
source(file.path(".","..","R","PerformDiffAIAnalysisFor2Conditions.R"))
removeX <- function(DF, legitim_chrgenes){
  return(DF[DF$ensembl_gene_id %in% legitim_chrgenes$gene, ])
}
chrgenes <- read.delim('../../../data/Mus_musculus.GRCm38.68.chrgenes.txt', col.names = c('chr', 'gene'))
inTabs <- paste0("../../../data/kidney/full/",
                c("NEB", "SMARTseq10ng", "SMARTseq100pg"),
                "_processed_gene_extended2.txt")
inDF18 <- removeX(GetGatkPipelineTabs(inTabs, c(6,6,6)), chrgenes)
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   ref_rep1 = col_integer(),
##   alt_rep1 = col_integer(),
##   ref_rep2 = col_integer(),
##   alt_rep2 = col_integer(),
##   ref_rep3 = col_integer(),
##   alt_rep3 = col_integer(),
##   ref_rep4 = col_integer(),
##   alt_rep4 = col_integer(),
##   ref_rep5 = col_integer(),
##   alt_rep5 = col_integer(),
##   ref_rep6 = col_integer(),
##   alt_rep6 = col_integer()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   ref_rep1 = col_integer(),
##   alt_rep1 = col_integer(),
##   ref_rep2 = col_integer(),
##   alt_rep2 = col_integer(),
##   ref_rep3 = col_integer(),
##   alt_rep3 = col_integer(),
##   ref_rep4 = col_integer(),
##   alt_rep4 = col_integer(),
##   ref_rep5 = col_integer(),
##   alt_rep5 = col_integer(),
##   ref_rep6 = col_integer(),
##   alt_rep6 = col_integer()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   ref_rep1 = col_integer(),
##   alt_rep1 = col_integer(),
##   ref_rep2 = col_integer(),
##   alt_rep2 = col_integer(),
##   ref_rep3 = col_integer(),
##   alt_rep3 = col_integer(),
##   ref_rep4 = col_integer(),
##   alt_rep4 = col_integer(),
##   ref_rep5 = col_integer(),
##   alt_rep5 = col_integer(),
##   ref_rep6 = col_integer(),
##   alt_rep6 = col_integer()
## )
## See spec(...) for full column specifications.
inDF18 
Ph <- c(0.68, 0.95, 0.98)
RESULT18_Q_humanread <- lapply(Ph, function(p){
  PerformDiffAIAnalysisFor2Conditions(inDF18, vect1CondReps=3:4, vect2CondReps=7:8, Q=p, fullOUT=T, BF=F)
})

DF18_covai <- RESULT18_Q_humanread[[1]]$deltaAIPairwise
DF18_covai <- DF18_covai[DF18_covai$group=="Condition2 01 vs 02", c("deltaAI", "MeanCov", "AI1")]

DF18_hobq <- do.call(rbind, lapply(1:length(RESULT18_Q_humanread), function(i){
  df = RESULT18_Q_humanread[[i]]$observedQuartiles
  df = df[df$condition=="Condition2" & df$ij=="01 vs 02", ]
  df[df$binNObservations >= 20, c("coverageBin", "deltaAI", "Q")]
}))

gg_covai <- ggplot() +
  geom_point(data=DF18_covai, aes(MeanCov, deltaAI), size=0.5, color="gray") +
  geom_line(data=DF18_hobq[DF18_hobq$coverageBin>=10, ], aes(coverageBin, deltaAI, color=Q)) +
  geom_point(data=DF18_hobq[DF18_hobq$coverageBin>=10, ], aes(coverageBin, deltaAI, color=Q), size=0.5) +
  labs(x = "Mean Gene Coverage", y = "Allelic Imbalance difference") +
  ggtitle("") + 
  theme_bw() +
  theme(legend.position = c(0.8, 0.8), text = element_text(size=12)) +
  xlim(0, 3500)

cowplot::save_plot(gg_covai, file="Fig_tree_with_Qs.pdf", base_height = 4)
## Warning: Removed 770 rows containing missing values (geom_point).
gg_covai_lin <- ggplot(data=DF18_hobq[DF18_hobq$coverageBin>=10, ], aes(x=coverageBin, y=deltaAI, group=Q, color=Q)) +
  geom_point(size=0.5) + 
  theme_bw() + 
  ggtitle("") + 
  xlab('Mean Gene Coverage') + ylab('Quantile(AI)') +
  theme(legend.position=c(0.8, 0.8), text = element_text(size=12)) +
  scale_x_continuous(trans='log2') +
  scale_y_continuous(trans='log2') +
  geom_smooth(method='lm', formula = y ~ offset(-0.5*x), aes(group=Q)) +
  theme(panel.background = element_rect(fill = 'white', colour = 'white'),
        legend.key = element_rect(fill = F, colour = "white")) +
  guides(color=guide_legend(override.aes=list(fill=NA)))

cowplot::save_plot(gg_covai_lin, file="Fig_parallel_lines.pdf", base_height = 4)


HQint <- sapply(1:length(Ph), function(i){RESULT18_Q_humanread[[i]]$intercepts$Condition2}$linInt)
HQint_Zscore <- rbind(data.frame(quantile = Ph, 
                                observation = HQint/HQint[2], 
                                x = "Correction constant"),
                     data.frame(quantile = Ph, 
                                observation = qnorm(Ph + (1-Ph)/2) / qnorm(0.84), 
                                x = "Binomial Z-score"))
HQint_Zscore$x <- factor(HQint_Zscore$x)

gg_qcoeff  <- ggplot(HQint_Zscore, aes(x, observation, color=x)) +
  geom_point(aes(color=x)) + 
  geom_label_repel(aes(label = quantile),
                  box.padding   = 0.35, 
                  point.padding = 0.5,
                  segment.color = 'grey50',
                  direction     = "y",
                  hjust         = 0) + 
  labs(x = "Type", y = "sd proportion") +
  ggtitle("") +
  theme_bw() +
  theme(legend.position = c(0.5, 0.9), text = element_text(size=12), legend.title = element_blank())   

Plot with coefficients:

# load the data
method_lib <- c("NEB 100ng", "SMARTseq 10 ng", "SMARTseq 100 pg")
INTC18_90_humanread <- do.call(rbind, lapply(0:2, function(i){
  C = (1:6)+i*6    
  data.frame(PerformDiffAIAnalysisFor2Conditions(inDF18, vect1CondReps=3:4, 
                                                 vect2CondReps=C, Q=0.90, fullOUT=T, BF=F)$intercepts$Condition2[, c("ij","linInt")],
             method = method_lib[i+1])
}))
INTC18_90_humanread$method <- factor(INTC18_90_humanread$method)
# plot with jitter
gg_mcoeff  <- ggplot(INTC18_90_humanread, aes(method, linInt, color=method)) +
  geom_jitter(aes(color=method)) + 
  labs(x = "Lib Preparation Method", y = "Observed correction constant") +
  ggtitle("") +
  theme_bw() +
  theme(legend.position = c(0.3, 0.8), text = element_text(size=12))
cowplot::save_plot(gg_mcoeff, file="Fig_coeff.pdf", base_height = 4)